Skip to content

Conversation

@mtrofin
Copy link
Member

@mtrofin mtrofin commented Oct 22, 2025

The idioms are described in https://reviews.llvm.org/D102116 and https://reviews.llvm.org/D92754. In both cases, the way the loop is expressed changes, without changing its iteration count, which means we can reuse the original loop's branch probabilities.

Issue #147390

@llvmbot
Copy link
Member

llvmbot commented Oct 22, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Mircea Trofin (mtrofin)

Changes

The


Full diff: https://github.com/llvm/llvm-project/pull/164523.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (+36-4)
  • (added) llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll (+70)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
 STATISTIC(NumShiftUntilZero,
           "Number of uncountable loops recognized as 'shift until zero' idiom");
 
+namespace llvm {
 bool DisableLIRP::All;
 static cl::opt<bool, true>
     DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt<bool> ForceMemsetPatternIntrinsic(
     cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
     cl::Hidden);
 
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
 namespace {
 
 class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
   // The loop trip count check.
   auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
                                        CurLoop->getName() + ".ivcheck");
-  Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector<uint32_t> BranchWeights;
+  const bool HasBranchWeights =
+      !ProfcheckDisableMetadataFixes &&
+      extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+    if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+      std::swap(BranchWeights[0], BranchWeights[1]);
+    // We're not changing the loop profile, so we can reuse the original loop's
+    // profile.
+    setBranchWeights(*BI, BranchWeights,
+                     /*IsExpected=*/false);
+  }
+
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE,
 ///     %start = <...>
 ///     %extraoffset = <...>
 ///     <...>
-///     br label %for.cond
+///     br label %loop
 ///
 ///   loop:
-///     %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+///     %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
 ///     %nbits = add nsw i8 %iv, %extraoffset
 ///     %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
 ///     %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
 
   // The loop terminator.
   Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
-  Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector<uint32_t> BranchWeights;
+  const bool HasBranchWeights =
+      !ProfcheckDisableMetadataFixes &&
+      extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+    if (InvertedCond)
+      std::swap(BranchWeights[0], BranchWeights[1]);
+    // We're not changing the loop profile, so we can reuse the original loop's
+    // profile.
+    setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+  }
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0000000000000..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt -passes="module(print<block-freq>),function(loop(loop-idiom)),module(print<block-freq>)" -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck --check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nsw i8 %iv, %extraoffset
+  %val.shifted = ashr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+  br i1 %val.shifted.iszero, label %end, label %loop, !prof !{!"branch_weights", i32 1, i32 1000 }
+
+end:
+  %iv.res = phi i8 [ %iv, %loop ]
+  %nbits.res = phi i8 [ %nbits, %loop ]
+  %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+  %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+  %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+  call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+  ret i8 %iv.res
+}
+
+define i32 @p1(i32 %x, i32 %bit) {
+entry:
+  %bitmask = shl i32 1, %bit
+  br label %loop
+
+loop:
+  %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
+  %x.curr.bitmasked = and i32 %x.curr, %bitmask
+  %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
+  %x.next = shl i32 %x.curr, 1
+  br i1 %x.curr.isbitunset, label %loop, label %end, !prof !{!"branch_weights", i32 500, i32 1 }
+
+end:
+  ret i32 %x.curr
+}
+
+;
+; PROFILE: Printing analysis results of BFI for function 'p0':
+; PROFILE: block-frequency-info: p0
+; PROFILE: - entry: float = 1.0,
+; PROFILE:  - loop: float = 1001.0,
+; PROFILE: - end: float = 1.0,
+; PROFILE: block-frequency-info: p1
+; PROFILE: - entry: float = 1.0, 
+; PROFILE: - loop: float = 501.0,
+; PROFILE: - end: float = 1.0,
+; PROFILE: block-frequency-info: p0
+; PROFILE: - entry: float = 1.0,
+; PROFILE:  - loop: float = 1001.0,
+; PROFILE: - end: float = 1.0,
+; PROFILE: block-frequency-info: p1
+; PROFILE: - entry: float = 1.0, 
+; PROFILE: - loop: float = 501.0,
+; PROFILE: - end: float = 1.0,

@mtrofin mtrofin requested a review from LebedevRI October 22, 2025 14:55
Copy link
Member Author

mtrofin commented Oct 22, 2025

@jdenny-ornl - I think I should also propagate the llvm.loop.estimated_trip_count, correct?

@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from 7b55a08 to 577971f Compare October 29, 2025 15:02
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from 72e8716 to 1bd2ba5 Compare October 29, 2025 15:02
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from 577971f to e745aaf Compare October 30, 2025 05:30
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch 2 times, most recently from aafb258 to 6486fed Compare October 30, 2025 15:09
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from e745aaf to 452c2f5 Compare October 30, 2025 15:09
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from 452c2f5 to ac02f48 Compare October 30, 2025 17:41
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch 2 times, most recently from f9110cf to a731249 Compare October 31, 2025 22:47
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch 2 times, most recently from 9d2017c to 19c883d Compare October 31, 2025 22:52
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from a731249 to 23cbeab Compare October 31, 2025 22:52
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from 19c883d to a5d8823 Compare November 3, 2025 18:56
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from 23cbeab to 648356d Compare November 3, 2025 18:56
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from e403ca4 to d2a0486 Compare November 4, 2025 02:12
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from 7a7a7f2 to 3cbf2c9 Compare November 4, 2025 02:15
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from d2a0486 to 3620cd8 Compare November 4, 2025 02:15
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from 3cbf2c9 to b1dfb82 Compare November 4, 2025 16:07
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch 2 times, most recently from 0037b3e to c5282b7 Compare November 4, 2025 19:01
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch 2 times, most recently from 7142f46 to 057f252 Compare November 4, 2025 21:01
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch 2 times, most recently from b095044 to 369240c Compare November 5, 2025 00:41
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch 2 times, most recently from 2e01505 to 9ddf571 Compare November 5, 2025 00:44
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from 369240c to 6cdaeb8 Compare November 5, 2025 00:45
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from 9ddf571 to 1f30253 Compare November 5, 2025 01:25
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from 6cdaeb8 to 71a68e9 Compare November 5, 2025 01:26
Copy link
Contributor

@jinhuang1102 jinhuang1102 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from 1f30253 to af190a9 Compare November 5, 2025 16:47
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from 71a68e9 to a4e7fae Compare November 5, 2025 16:47
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from af190a9 to 0e19741 Compare November 5, 2025 16:49
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from a4e7fae to d41e0be Compare November 5, 2025 16:49
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector branch from 0e19741 to e19f34d Compare November 5, 2025 19:22
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from d41e0be to 38069a3 Compare November 5, 2025 19:22
Base automatically changed from users/mtrofin/10-21-_lver_profcheck_explicitly_set_unknown_branch_weights_for_the_versioned/unversioned_selector to main November 5, 2025 20:13
@mtrofin mtrofin force-pushed the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch from 38069a3 to 075b8ff Compare November 5, 2025 20:15
Copy link
Member Author

mtrofin commented Nov 5, 2025

@jdenny-ornl I'll merge this, and can follow up with the llvm.loop.estimated_trip_count propagation if that is the right thing to do (somewhat arguably separate change anyway)

Copy link
Member Author

mtrofin commented Nov 5, 2025

Merge activity

  • Nov 5, 9:38 PM UTC: A user started a stack merge that includes this pull request via Graphite.
  • Nov 5, 9:39 PM UTC: @mtrofin merged this pull request with Graphite.

@mtrofin mtrofin merged commit 5e46103 into main Nov 5, 2025
8 of 9 checks passed
@mtrofin mtrofin deleted the users/mtrofin/10-21-_lir_profcheck_reuse_the_loop_s_exit_condition_profile branch November 5, 2025 21:39
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants